package com.apobates.forum.utils;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 字符串搜索工具
 *
 * @author xiaofanku
 * @since 20200526
 */
public class StringSeekUtils {
    private final static Logger logger = LoggerFactory.getLogger(StringSeekUtils.class);
    
    private StringSeekUtils() throws Exception {
        throw new Exception("不需要实化公共工具类");
    }
    
    /**
     * 从参数的字典文件中随机读取三行
     *
     * @param directoryFile
     * @return
     * @throws IOException
     */
    public static String search(File directoryFile) throws IOException {
        StringBuffer sb = new StringBuffer();
        try (RandomAccessFile file = new RandomAccessFile(directoryFile, "r")) {
            //
            long totalLen = file.length();
            long seekPos = Commons.randomNumericRange(0L, totalLen);
            logger.info("seek position: " + seekPos);
            //
            file.seek(seekPos);
            String tmp = new String(file.readLine().getBytes("ISO-8859-1"), "utf-8");
            sb.append(tmp);
            //
            tmp = new String(file.readLine().getBytes("ISO-8859-1"), "utf-8");
            sb.append(tmp);
            //
            tmp = new String(file.readLine().getBytes("ISO-8859-1"), "utf-8");
            sb.append(tmp);
        }
        return sb.toString();
    }
    
    /**
     * 统计参数中的词频.只适用于英文
     *
     * @param sourceStr
     * @return
     */
    public static Map<String, Integer> wordFrequencyCount(String sourceStr) {
        Objects.requireNonNull(sourceStr);
        List<String> list = Stream.of(sourceStr).map(w -> w.split("\\s+")).flatMap(Arrays::stream).collect(Collectors.toList());
        
        Map<String, Integer> wordCounter = list.stream().collect(Collectors.toMap(w -> w.toLowerCase(), w -> 1, Integer::sum));
        return wordCounter;
    }
    
    /**
     * 查找参数中长度超过指定长度的单词.只适用于英文
     *
     * @param sourceStr
     * @param minLength
     * @return
     */
    public static Optional<String> walkWord(String sourceStr, int minLength) {
        List<String> list = Stream.of(sourceStr).map(w -> w.split("\\s+")).flatMap(Arrays::stream).filter(str -> str.length() >= minLength).collect(Collectors.toList());
        if (null == list || list.isEmpty()) {
            return Optional.empty();
        }
        String t = String.join("|", list);
        logger.info("source rawdata: " + t);
        Random rand = new Random();
        int tmpos = rand.nextInt(list.size());
        String data = list.get(tmpos).toLowerCase();
        logger.info("current word: " + data);
        return Optional.of(data);
    }
    
    /**
     * 过滤参数的中非字母符号.只适用于英文
     *
     * @param rndStr
     * @return
     */
    public static String clearWord(String rndStr) {
        logger.info("clear word: " + rndStr);
        StringBuffer sb = new StringBuffer();
        for (char c : rndStr.toCharArray()) {
            if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
                sb.append(c);
            } else {
                break;
            }
        }
        return sb.toString();
    }
    
    /**
     * 通过正则表达式统计字符串(content)中指定字符串(findWord)出现的次数
     *
     * @param content 查找的字符串内容
     * @param findWord 待查找的字符串
     * @return
     */
    public static int queryWordCountByExpReg(String content, String findWord) {
        Objects.requireNonNull(findWord);
        //
        if (!Commons.isNotBlank(content)) {
            return -1;
        }
        if (!Commons.isNotBlank(findWord)) {
            return -1;
        }
        // https://blog.csdn.net/lsylsy726/article/details/86697513
        int affect = 0;
        Pattern pattern = Pattern.compile(findWord); // 创建一个正则表达式
        Matcher matcher = pattern.matcher(content); // 创建一个匹配器
        while (matcher.find()) { // while循环的条件，若matcher.find()为true即匹配成功
            affect += 1; // 若匹配成功则i自增
        }
        return affect;
    }
    
    /**
     * 统计字符串(content)中指定字符串(findWord)出现的次数
     *
     * @param content 查找的字符串内容
     * @param findWord 待查找的字符串
     * @return
     */
    public static int queryWordCount(String content, String findWord) {
        Objects.requireNonNull(findWord);
        //
        if (!Commons.isNotBlank(content)) {
            return -1;
        }
        if (!Commons.isNotBlank(findWord)) {
            return -1;
        }
        // findWord转成unicode放到struct中
        List<String> struct = stringToUnicode(findWord);
        logger.info("[QWC][1]ref code: " + String.join(",", struct));
        // content转成unicode放到source中
        List<String> source = stringToUnicode(content);
        Map<Integer, List<String>> result = new HashMap<>();
        int i = 1;
        for (String str : source) {
            logger.info("[QWC][2]union code: " + str + ",i: " + i);
            // 取得str在struct中的下标
            if (struct.contains(str)) {
                int index = struct.indexOf(str);
                logger.info("[QWC][3]current ref: " + index);
                int resultKey = i - index;
                logger.info("[QWC][4]current loop: " + resultKey);
                if (result.containsKey(resultKey)) {
                    result.get(resultKey).add(str);
                } else {
                    if (0 == index) {
                        List<String> tmp = new ArrayList<>();
                        tmp.add(str);
                        result.put(i, tmp);
                    }
                }
            }
            i += 1;
        }
        // 数一数result中的Value长度等于struct长度的数量
        int j = 0;
        for (List<String> v : result.values()) {
            if (v.size() == struct.size()) {
                logger.info("[QWC][5]loop string: " + unicodeToString(v));
                j += 1;
            }
        }
        return j;
    }
    
    /**
     * 字符串转成 unicode 码值
     *
     * @param str
     * @return 返回的列表集合元素为4位值,例:1357,unicode值为\u1357
     */
    private static List<String> stringToUnicode(String str) {
        List<String> struct = new ArrayList<>();
        char[] c = str.toCharArray();
        for (int i = 0; i < c.length; i++) {
            struct.add(Integer.toHexString(c[i]));
        }
        return struct;
    }
    
    /**
     * 字符串转成 unicode 码值字符串
     *
     * @param str
     * @return
     */
    private static String strToUnicode(String str) {
        StringBuffer struct = new StringBuffer();
        char[] c = str.toCharArray();
        for (int i = 0; i < c.length; i++) {
            struct.append(Integer.toHexString(c[i]));
        }
        return struct.toString();
    }
    
    /**
     * stringToUnicode方法的反方法
     *
     * @param unicodeCharSet 列表集合元素为4位值,例:1357,unicode值为\u1357
     * @return
     */
    private static String unicodeToString(List<String> unicodeCharSet) {
        StringBuffer sb = new StringBuffer();
        for (String str : unicodeCharSet) {
            int index = Integer.parseInt(str, 16);
            sb.append((char) index);
        }
        return sb.toString();
    }
    
    /**
     * 使用Boyer Moore 算法(Algorithm)搜索模式字符串在内容中出现的次数
     *
     * @param content 内容
     * @param findWord 模式字符串
     * @return
     */
    public static int queryWordCountByBM(String content, String findWord) {
        Objects.requireNonNull(findWord);
        //
        if (!Commons.isNotBlank(content)) {
            return -1;
        }
        if (!Commons.isNotBlank(findWord)) {
            return -1;
        }
        String source = strToUnicode(findWord);
        String target = strToUnicode(content);
        return searchPattern(target, source);
    }
    
    /**
     * 使用Boyer Moore 算法(Algorithm)搜索模式字符串在内容中出现的次数
     *
     * @param mainString 内容
     * @param pattern 模式字符串
     * @return
     */
    public static int searchPattern(String mainString, String pattern) {
        int index = 0;
        int patLen = pattern.length();
        int strLen = mainString.length();
        int[] borderArray = new int[patLen + 1];
        int[] shiftArray = new int[patLen + 1];
        
        for (int i = 0; i <= patLen; i++) {
            shiftArray[i] = 0; //set all shift array to 0
        }
        
        fullSuffixMatch(shiftArray, borderArray, pattern);
        partialSuffixMatch(shiftArray, borderArray, pattern);
        int shift = 0;
        
        while (shift <= (strLen - patLen)) {
            int j = patLen - 1;
            while (j >= 0 && pattern.charAt(j) == mainString.charAt(shift + j)) {
                j--; //reduce j when pattern and main string character is matching
            }
            
            if (j < 0) {
                index++;
                //array[index-1] = shift;
                shift += shiftArray[0];
            } else {
                shift += shiftArray[j + 1];
            }
        }
        return index;
    }
    
    private static void fullSuffixMatch(int[] shiftArr, int[] borderArr, String pattern) {
        int n = pattern.length(); // find length of pattern
        int i = n;
        int j = n + 1;
        borderArr[i] = j;
        
        while (i > 0) {
            // search right when (i-1)th and (j-1)th item are not same
            while (j <= n && pattern.charAt(i - 1) != pattern.charAt(j - 1)) {
                if (shiftArr[j] == 0) {
                    shiftArr[j] = j - i; // shift pattern from i to j
                }
                j = borderArr[j]; // update border
            }
            i--;
            j--;
            borderArr[i] = j;
        }
    }
    
    private static void partialSuffixMatch(int[] shiftArr, int[] borderArr, String pattern) {
        int n = pattern.length(); // find length of pattern
        int j;
        j = borderArr[0];
        
        for (int i = 0; i < n; i++) {
            if (shiftArr[i] == 0) {
                shiftArr[i] = j; // when shift is 0, set shift to border value
            }
            if (i == j) {
                j = borderArr[j]; // update border value
            }
        }
    }
}